Aufgabe: Vorhersage der Umsätze vom 9.6.2019 bis 30.07.2019
Warengruppen: * 1 = Brot * 2 = Brötchen * 3 = Croissant * 4 = Konditorei * 5 = Kuchen * 6 = Saisonbrot
Wetterdaten: * Mittlerer Bewölkungsgrad am Tag (0 = min, 8 = max) * MIttlere Temperatur in C * Mittlere Windgeschwindigkeit in m/s * Wettercode (http://www.seewetter-kiel.de/seewetter/daten_symbole.htm) * und in der Datei wettercodes.Rda
remove(list = ls())
# Create list with needed libraries
pkgs <- c("lubridate", "stringr","tidyverse", "readr",
"fastDummies", "reticulate", "ggplot2", "Metrics", "VIM")
# Load each listed library and check if it is installed and install if necessary
for (pkg in pkgs) {
if (!require(pkg, character.only = TRUE)) {
install.packages(pkg)
library(pkg, character.only = TRUE)
}
}
# Lade Daten
load("pj_wetter_dummy.Rda")
pj_wetter <- pj_wetter_dummy
load("kiwoDT.Rda")
pj_kiwo <- kiwoDT
load("pj_umsatz.Rda")
load("schulferien.Rda")
pj_schulferien <- schulferien
# Erste Betrachtung der Daten
#summary(pj_wetter)
#summary(pj_kiwo)
#summary(pj_umsatz)
# Merge erstellt automatisch die Schnittmenge
# Der Zusatz all.x = TRUE sorgt dafür, dass keine Zeilen (basierend auf Datensatz x) weggelöscht werden
# Wetterdaten nach Datum hinzufügen
pj_umsatz_wetter <- merge(pj_umsatz, pj_wetter, by="Datum", all.x = TRUE)
# Schulferien nach Datum hinzufügen
pj_umsatz_wetter_ferien <- merge(pj_umsatz_wetter, pj_schulferien, by="Datum", all.x = TRUE)
# KiWo nach Datum hinzufügen
allData <- merge(pj_umsatz_wetter_ferien, pj_kiwo, by="Datum", all.x = TRUE)
# auf fehlende Werte überprüfen:
allData_na <- allData %>%
aggr(combined=TRUE, numbers=TRUE)
Warning: not enough horizontal space to display frequencies
# Imputation Temperatur und Windstaerke
# Aktuell: "Datenspende" vom Wert vom Vortag
# ZIEL: Mittelwert aus Temperatur von Vortag und Tag danach -> Armando! :)
allData <- allData %>%
hotdeck(variable = c("Temperatur", "Windstaerke"),
ord_var = "Datum")
#imputierte Werte graphisch überprüfen:
ggplot(allData) +
geom_point(aes(x = Datum, y = Temperatur, color = Temperatur_imp))
ggplot(allData) +
geom_point(aes(x = Datum, y = Windstaerke, color = Windstaerke_imp))
# NA Wettercodes zu 0, da Spalte WC_NA angibt, wo Wettercodes gefehlt haben
# Spalten 12 -24
# das gleiche gilt bei der Bewölkung
# Spalten 26 - 29
# weitere NA mit 0 füllen, dort wo es Sinn ergibt
allData <- allData %>%
mutate_at(c(12:34), ~replace(., is.na(.), 0))
# dummy coding der Wochentage
allData_dummy <- dummy_cols(allData, select_columns = "Wochentag")
allData_dummy$year <- year(allData_dummy$Datum)
allData_dummy$month <- month(allData_dummy$Datum)
allData_dummy$day <- day(allData_dummy$Datum)
allData_dummy$Datum <- NULL
summary(allData_dummy)
Brot Brötchen Croissant Konditorei Kuchen Saisonbrot
Min. : 23.11 Min. : 175.0 Min. : 37.74 Min. : 27.43 Min. : 121.5 Min. : 0.00
1st Qu.: 98.14 1st Qu.: 286.8 1st Qu.:106.79 1st Qu.: 66.12 1st Qu.: 231.1 1st Qu.: 0.00
Median :121.89 Median : 367.8 Median :143.90 Median : 79.67 Median : 268.2 Median : 0.00
Mean :124.40 Mean : 398.9 Mean :164.52 Mean : 87.12 Mean : 278.2 Mean : 11.02
3rd Qu.:147.06 3rd Qu.: 486.1 3rd Qu.:205.06 3rd Qu.: 97.41 3rd Qu.: 309.1 3rd Qu.: 0.00
Max. :416.79 Max. :1203.4 Max. :565.94 Max. :430.50 Max. :1879.5 Max. :172.87
Wochentag Konditorei_imp Windstaerke Temperatur WC_Bewölkung_abnehmend
Length:2123 Mode :logical Min. : 2.00 Min. :-8.475 Min. :0
Class :character FALSE:2069 1st Qu.: 5.00 1st Qu.: 6.312 1st Qu.:0
Mode :character TRUE :54 Median : 5.00 Median :11.650 Median :0
Mean : 5.58 Mean :12.031 Mean :0
3rd Qu.: 6.00 3rd Qu.:17.769 3rd Qu.:0
Max. :12.00 Max. :32.671 Max. :0
WC_Bewölkung_gleichbleibend WC_Bewölkung_nicht_beobachtet WC_Bewölkung_zunehmend WC_Dunst_Staub
Min. :0 Min. :0.00000 Min. :0.000000 Min. :0.00000
1st Qu.:0 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.00000
Median :0 Median :0.00000 Median :0.000000 Median :0.00000
Mean :0 Mean :0.09279 Mean :0.000471 Mean :0.06924
3rd Qu.:0 3rd Qu.:0.00000 3rd Qu.:0.000000 3rd Qu.:0.00000
Max. :0 Max. :1.00000 Max. :1.000000 Max. :1.00000
WC_Ereignisse_letzte_h WC_Gewitter WC_Nebel_Eisnebel WC_Regen WC_Schauer WC_Schnee
Min. :0.0000 Min. :0.00000 Min. :0.00000 Min. :0.0000 Min. :0 Min. :0.00000
1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0 1st Qu.:0.00000
Median :0.0000 Median :0.00000 Median :0.00000 Median :0.0000 Median :0 Median :0.00000
Mean :0.1512 Mean :0.01319 Mean :0.01413 Mean :0.3151 Mean :0 Mean :0.01978
3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:0 3rd Qu.:0.00000
Max. :1.0000 Max. :1.00000 Max. :1.00000 Max. :1.0000 Max. :0 Max. :1.00000
WC_Sprühregen WC_Trockenereignisse WC_NA Bewoelkungsgrad_gering Bewoelkungsgrad_keine
Min. :0.000000 Min. :0.00000 Min. :0.0000 Min. :0.0000 Min. :0.00
1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00
Median :0.000000 Median :0.00000 Median :0.0000 Median :0.0000 Median :0.00
Mean :0.004239 Mean :0.07772 Mean :0.2346 Mean :0.1182 Mean :0.13
3rd Qu.:0.000000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00
Max. :1.000000 Max. :1.00000 Max. :1.0000 Max. :1.0000 Max. :1.00
Bewoelkungsgrad_mittel Bewoelkungsgrad_stark Bewoelkungsgrad_NA Schulferien KielerWoche
Min. :0.0000 Min. :0.0000 Min. :0.000000 Min. :0.0000 Min. :0.0000
1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000000 1st Qu.:0.0000 1st Qu.:0.0000
Median :0.0000 Median :0.0000 Median :0.000000 Median :0.0000 Median :0.0000
Mean :0.3627 Mean :0.3773 Mean :0.004239 Mean :0.2398 Mean :0.0212
3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.000000 3rd Qu.:0.0000 3rd Qu.:0.0000
Max. :1.0000 Max. :1.0000 Max. :1.000000 Max. :1.0000 Max. :1.0000
Temperatur_imp Windstaerke_imp Wochentag_Friday Wochentag_Monday Wochentag_Saturday Wochentag_Sunday
Min. :0.000000 Min. :0.000000 Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
1st Qu.:0.000000 1st Qu.:0.000000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
Median :0.000000 Median :0.000000 Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
Mean :0.007536 Mean :0.007536 Mean :0.1413 Mean :0.1423 Mean :0.1441 Mean :0.1432
3rd Qu.:0.000000 3rd Qu.:0.000000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
Max. :1.000000 Max. :1.000000 Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
Wochentag_Thursday Wochentag_Tuesday Wochentag_Wednesday year month day
Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :2013 Min. : 1.00 Min. : 1.0
1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:2014 1st Qu.: 3.50 1st Qu.: 8.0
Median :0.0000 Median :0.0000 Median :0.0000 Median :2016 Median : 7.00 Median :16.0
Mean :0.1432 Mean :0.1432 Mean :0.1427 Mean :2016 Mean : 6.52 Mean :15.7
3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:2017 3rd Qu.:10.00 3rd Qu.:23.0
Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :2019 Max. :12.00 Max. :31.0
save(allData_dummy, file="projectData_dummy.Rda")
# Erstelle einen leeren Dataframe mit einer Spalte für das Datum
testDatenSatz <- data.frame(Datum = character())
# Erstelle eine Sequenz von Daten im angegebenen Zeitraum
datum_sequenz <- seq(from = as.Date("2019-06-09"),
to = as.Date("2019-07-30"),
by = "days")
# Füge die Daten der Sequenz dem Dataframe hinzu
testDatenSatz <- rbind(testDatenSatz, data.frame(Datum = datum_sequenz))
testDatenSatz$Wochentag <- weekdays(testDatenSatz$Datum)
testDatenSatz <- merge(testDatenSatz, pj_wetter, by="Datum", all.x = TRUE)
testDatenSatz <- merge(testDatenSatz, pj_schulferien, by="Datum", all.x = TRUE)
testDatenSatz <- merge(testDatenSatz, pj_kiwo, by="Datum", all.x = TRUE)
### Testdatensatz ###
testDatenSatz <- testDatenSatz %>%
hotdeck(variable = c("Temperatur", "Windstaerke"),
ord_var = "Datum")
#imputierte Werte von testDatenSatz graphisch überprüfen:
ggplot(testDatenSatz) +
geom_point(aes(x = Datum, y = Temperatur, color = Temperatur_imp))
ggplot(testDatenSatz) +
geom_point(aes(x = Datum, y = Windstaerke, color = Windstaerke_imp))
testDatenSatz <- testDatenSatz %>%
mutate_at(c(4:26), ~replace(., is.na(.), 0))
# dummy coding der Wochentage
testDatenSatz <- dummy_cols(testDatenSatz, select_columns = "Wochentag")
testDatenSatz$year <- year(testDatenSatz$Datum)
testDatenSatz$month <- month(testDatenSatz$Datum)
testDatenSatz$day <- day(testDatenSatz$Datum)
testDatenSatz$Datum <- NULL
summary(testDatenSatz)
Wochentag Windstaerke Temperatur WC_Bewölkung_abnehmend WC_Bewölkung_gleichbleibend
Length:52 Min. :3.000 Min. :14.46 Min. :0 Min. :0
Class :character 1st Qu.:5.000 1st Qu.:16.93 1st Qu.:0 1st Qu.:0
Mode :character Median :6.000 Median :19.59 Median :0 Median :0
Mean :5.788 Mean :20.41 Mean :0 Mean :0
3rd Qu.:7.000 3rd Qu.:23.40 3rd Qu.:0 3rd Qu.:0
Max. :9.000 Max. :29.73 Max. :0 Max. :0
WC_Bewölkung_nicht_beobachtet WC_Bewölkung_zunehmend WC_Dunst_Staub WC_Ereignisse_letzte_h WC_Gewitter
Min. :0.0000 Min. :0 Min. :0.0000 Min. :0.0000 Min. :0.0000
1st Qu.:0.0000 1st Qu.:0 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
Median :0.0000 Median :0 Median :0.0000 Median :0.0000 Median :0.0000
Mean :0.1538 Mean :0 Mean :0.1346 Mean :0.1538 Mean :0.1154
3rd Qu.:0.0000 3rd Qu.:0 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
Max. :1.0000 Max. :0 Max. :1.0000 Max. :1.0000 Max. :1.0000
WC_Nebel_Eisnebel WC_Regen WC_Schauer WC_Schnee WC_Sprühregen WC_Trockenereignisse WC_NA
Min. :0 Min. :0.0000 Min. :0 Min. :0 Min. :0.00000 Min. :0.00000 Min. :0.0000
1st Qu.:0 1st Qu.:0.0000 1st Qu.:0 1st Qu.:0 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000
Median :0 Median :0.0000 Median :0 Median :0 Median :0.00000 Median :0.00000 Median :0.0000
Mean :0 Mean :0.1731 Mean :0 Mean :0 Mean :0.01923 Mean :0.01923 Mean :0.2308
3rd Qu.:0 3rd Qu.:0.0000 3rd Qu.:0 3rd Qu.:0 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000
Max. :0 Max. :1.0000 Max. :0 Max. :0 Max. :1.00000 Max. :1.00000 Max. :1.0000
Bewoelkungsgrad_gering Bewoelkungsgrad_keine Bewoelkungsgrad_mittel Bewoelkungsgrad_stark Bewoelkungsgrad_NA
Min. :0.00000 Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0
1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0
Median :0.00000 Median :0.0000 Median :0.0000 Median :0.0000 Median :0
Mean :0.05769 Mean :0.1346 Mean :0.4615 Mean :0.3462 Mean :0
3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0
Max. :1.00000 Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :0
Schulferien KielerWoche Temperatur_imp Windstaerke_imp Wochentag_Friday Wochentag_Monday
Min. :0.0000 Min. :0.0000 Min. :0 Mode :logical Min. :0.0000 Min. :0.0000
1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0 FALSE:52 1st Qu.:0.0000 1st Qu.:0.0000
Median :1.0000 Median :0.0000 Median :0 Median :0.0000 Median :0.0000
Mean :0.5769 Mean :0.1731 Mean :0 Mean :0.1346 Mean :0.1538
3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0 3rd Qu.:0.0000 3rd Qu.:0.0000
Max. :1.0000 Max. :1.0000 Max. :0 Max. :1.0000 Max. :1.0000
Wochentag_Saturday Wochentag_Sunday Wochentag_Thursday Wochentag_Tuesday Wochentag_Wednesday year
Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :2019
1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:2019
Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000 Median :2019
Mean :0.1346 Mean :0.1538 Mean :0.1346 Mean :0.1538 Mean :0.1346 Mean :2019
3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:2019
Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :2019
month day
Min. :6.000 Min. : 1.00
1st Qu.:6.000 1st Qu.:11.00
Median :7.000 Median :17.50
Mean :6.577 Mean :17.19
3rd Qu.:7.000 3rd Qu.:24.00
Max. :7.000 Max. :30.00
save(testDatenSatz, file="Datenaufbereitung_Testdaten.Rda")
features <- c("day", "month", "year",
"Windstaerke", "Temperatur", "WC_Bewölkung_abnehmend",
"WC_Bewölkung_gleichbleibend", "WC_Bewölkung_nicht_beobachtet", "WC_Bewölkung_zunehmend",
"WC_Dunst_Staub", "WC_Ereignisse_letzte_h", "WC_Gewitter",
"WC_Nebel_Eisnebel", "WC_Regen", "WC_Schauer",
"WC_Schnee", "WC_Sprühregen", "WC_Trockenereignisse",
"WC_NA", "Bewoelkungsgrad_gering", "Bewoelkungsgrad_keine",
"Bewoelkungsgrad_mittel", "Bewoelkungsgrad_stark", "Bewoelkungsgrad_NA",
"Schulferien", "KielerWoche", "Wochentag_Tuesday",
"Wochentag_Thursday",
"Wochentag_Friday", "Wochentag_Wednesday", "Wochentag_Monday",
"Wochentag_Saturday", "Wochentag_Sunday")
labels <- c("Brot", "Brötchen", "Croissant", "Konditorei", "Kuchen")
# Setting the random counter to a fixed value, so the random initialization stays the same (the random split is always the same)
set.seed(1)
assignment <- sample(1:2, size = nrow(allData_dummy), prob = c(.8, .2), replace = TRUE)
# Create training, validation and test data for the features and the labels
training_features <- allData_dummy[assignment == 1, features]
training_labels <- allData_dummy[assignment == 1, labels]
training_labels <- as.data.frame(training_labels)
validation_features <- allData_dummy[assignment == 2, features]
validation_labels <- allData_dummy[assignment == 2, labels]
validation_labels <- as.data.frame(validation_labels)
testing_features <- testDatenSatz %>% select(features)
#are there any missing values?
table(is.na(training_features))
FALSE
55770
table(is.na(validation_features))
FALSE
14289
table(is.na(testing_features))
FALSE
1716
#summary(allData_dummy)
reticulate::repl_python()
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import InputLayer, Dense, BatchNormalization, Dropout
from tensorflow.keras.optimizers import Adam
# The argument "input_shape" for the definition of the input layer must include
# the number of input variables (features) used for the model.
# To automatically calculate this number we use the function `r.training_features.keys()`,
# which returns the list of variable names of the dataframe `training_features`.
# Then, the funtion `len()` returns the length of this list of variable names
# (i.e. the number of variables in the input)
model = Sequential([
InputLayer(input_shape = (len(r.training_features.keys()), )),
BatchNormalization(),
Dense(len(r.training_features.keys()), activation = 'swish'),
Dropout(0.2),
Dense(len(r.training_features.keys()), activation = 'swish'),
Dropout(0.2),
Dense(len(r.training_features.keys()), activation = 'swish'),
Dense(5)
])
# Ausgabe einer ZUsammenfassung zur Form des MOdells, das geschätzt wird (nicht notwendig)
#model.summary()
# definition of the loss function and the optimazation function with hyperparameters
model.compile(loss="mape", optimizer=Adam(learning_rate=0.001))
#Schätzung des Modells
history = model.fit(r.training_features, r.training_labels, epochs = 250,
validation_data = (r.validation_features, r.validation_labels), verbose = 0)
model.save("python_model.h5")
quit
# Graphische Ausgabe der Modelloptimierung
#create data
data <- data.frame(val_loss = unlist(py$history$history$val_loss),
loss = unlist(py$history$history$loss))
ggplot(data[-(1:10), ])+
geom_line(aes(x = 1:length(val_loss), y = val_loss, colour = "Validation Loss")) +
geom_line(aes(x = 1:length(loss), y = loss, colour = "Training Loss")) +
scale_colour_manual(values = c("Training Loss"="blue", "Validation Loss" = "red")) +
labs(title = "Loss Function Values During Optimazation") +
xlab("Iteration Number") +
ylab("Loss")
# Schätzung der (normierten) Preise für die Trainings- und Testdaten
training_predictions <- py$model$predict(training_features)
1/53 [..............................] - ETA: 1s
53/53 [==============================] - 0s 336us/step
validation_predictions <- py$model$predict(validation_features)
1/14 [=>............................] - ETA: 0s
14/14 [==============================] - 0s 326us/step
testing_predictions <- py$model$predict(testing_features)
1/2 [==============>...............] - ETA: 0s
2/2 [==============================] - 0s 471us/step
# Vergleich der Gütekriterien für die Traingings- und Testdaten
a <- format(mape(training_labels[,1], training_predictions[,1])*100, digits=3, nsmall=2)
b <- format(mape(training_labels[,2], training_predictions[,2])*100, digits=3, nsmall=2)
c <- format(mape(training_labels[,3], training_predictions[,3])*100, digits=3, nsmall=2)
d <- format(mape(training_labels[,4], training_predictions[,4])*100, digits=3, nsmall=2)
e <- format(mape(training_labels[,5], training_predictions[,5])*100, digits=3, nsmall=2)
cat(paste0("\nMAPE on the Training Data1:\t", a))
MAPE on the Training Data1: 17.87
cat(paste0("\nMAPE on the Training Data2:\t", b))
MAPE on the Training Data2: 11.48
cat(paste0("\nMAPE on the Training Data3:\t", c))
MAPE on the Training Data3: 17.22
cat(paste0("\nMAPE on the Training Data4:\t", d))
MAPE on the Training Data4: 20.49
cat(paste0("\nMAPE on the Training Data5:\t", e, "\n"))
MAPE on the Training Data5: 13.37
g <- format(mape(validation_labels[,1], validation_predictions[,1])*100, digits=3, nsmall=2)
h <- format(mape(validation_labels[,2], validation_predictions[,2])*100, digits=3, nsmall=2)
i <- format(mape(validation_labels[,3], validation_predictions[,3])*100, digits=3, nsmall=2)
j <- format(mape(validation_labels[,4], validation_predictions[,4])*100, digits=3, nsmall=2)
k <- format(mape(validation_labels[,5], validation_predictions[,5])*100, digits=3, nsmall=2)
cat(paste0("\nMAPE on the Validation Data1:\t", g))
MAPE on the Validation Data1: 19.26
cat(paste0("\nMAPE on the Validation Data2:\t", h))
MAPE on the Validation Data2: 13.49
cat(paste0("\nMAPE on the Validation Data3:\t", i))
MAPE on the Validation Data3: 20.83
cat(paste0("\nMAPE on the Validation Data4:\t", j))
MAPE on the Validation Data4: 20.22
cat(paste0("\nMAPE on the Validation Data5:\t", k, "\n"))
MAPE on the Validation Data5: 14.42
# Mean of Training and Validation Data MAPE
meanT <- c(as.double(a), as.double(b), as.double(c), as.double(d), as.double(e))
meanV <- c(as.double(g), as.double(h), as.double(i), as.double(j), as.double(k))
cat(paste0("\nMean Training MAPE: ", mean(meanT), "\n"))
Mean Training MAPE: 16.086
cat(paste0("Mean Validation MAPE: ", mean(meanV), "\n"))
Mean Validation MAPE: 17.644
data_train <- data.frame(prediction = training_predictions[,1], actual = training_labels[,1])
data_val <- data.frame(prediction = validation_predictions[,1], actual = validation_labels[,1])
data_test <- data.frame(prediction = testing_predictions[,1])
# Plot der Ergebnisse der Trainingsdaten
ggplot(data_train[]) +
geom_line( aes(x=1:length(prediction), y=prediction, colour = "Predicted Values" )) +
geom_line( aes(x=1:length(actual), y=actual, colour = "Actual Values" )) +
scale_colour_manual( values = c("Predicted Values"="blue", "Actual Values"="red") ) +
labs(title="Predicted and Actual Values for the Training Data 1") +
xlab("Case Number") +
ylab("Price in EUR")
# Plot der Ergebnisse der Validierungsdaten
ggplot(data_val[,]) +
geom_line( aes(x=1:length(prediction), y=prediction, colour = "Predicted Values" )) +
geom_line( aes(x=1:length(actual), y=actual, colour = "Actual Values" )) +
scale_colour_manual( values = c("Predicted Values"="blue", "Actual Values"="red") ) +
labs(title="Predicted and Actual Values for the Validation Data 1") +
xlab("Case Number") +
ylab("Price in EUR")
# Plot der Ergebnisse der Testdaten
ggplot(data_test) +
geom_line( aes(x=1:length(prediction), y=prediction, colour = "Predicted Values" )) +
labs(title="Prediction for the Test Data 1") +
xlab("Case Number") +
ylab("Price in EUR")
#------------------------- 2 -------------------------#
data_train2 <- data.frame(prediction = training_predictions[,2], actual = training_labels[,2])
data_val2 <- data.frame(prediction = validation_predictions[,2], actual = validation_labels[,2])
data_test2 <- data.frame(prediction = testing_predictions[,2])
# Plot der Ergebnisse der Trainingsdaten
ggplot(data_train2) +
geom_line( aes(x=1:length(prediction), y=prediction, colour = "Predicted Values" )) +
geom_line( aes(x=1:length(actual), y=actual, colour = "Actual Values" )) +
scale_colour_manual( values = c("Predicted Values"="blue", "Actual Values"="red") ) +
labs(title="Predicted and Actual Values for the Training Data 2") +
xlab("Case Number") +
ylab("Price in EUR")
# Plot der Ergebnisse der Validierungsdaten
ggplot(data_val2) +
geom_line( aes(x=1:length(prediction), y=prediction, colour = "Predicted Values" )) +
geom_line( aes(x=1:length(actual), y=actual, colour = "Actual Values" )) +
scale_colour_manual( values = c("Predicted Values"="blue", "Actual Values"="red") ) +
labs(title="Predicted and Actual Values for the Validation Data 2") +
xlab("Case Number") +
ylab("Price in EUR")
# Plot der Ergebnisse der Testdaten
ggplot(data_test2) +
geom_line( aes(x=1:length(prediction), y=prediction, colour = "Predicted Values" )) +
labs(title="Prediction for the Test Data 2") +
xlab("Case Number") +
ylab("Price in EUR")
#------------------------- 3 -------------------------#
data_train3 <- data.frame(prediction = training_predictions[,3], actual = training_labels[,3])
data_val3 <- data.frame(prediction = validation_predictions[,3], actual = validation_labels[,3])
data_test3 <- data.frame(prediction = testing_predictions[,3])
# Plot der Ergebnisse der Trainingsdaten
ggplot(data_train3) +
geom_line( aes(x=1:length(prediction), y=prediction, colour = "Predicted Values" )) +
geom_line( aes(x=1:length(actual), y=actual, colour = "Actual Values" )) +
scale_colour_manual( values = c("Predicted Values"="blue", "Actual Values"="red") ) +
labs(title="Predicted and Actual Values for the Training Data 3") +
xlab("Case Number") +
ylab("Price in EUR")
# Plot der Ergebnisse der Validierungsdaten
ggplot(data_val3) +
geom_line( aes(x=1:length(prediction), y=prediction, colour = "Predicted Values" )) +
geom_line( aes(x=1:length(actual), y=actual, colour = "Actual Values" )) +
scale_colour_manual( values = c("Predicted Values"="blue", "Actual Values"="red") ) +
labs(title="Predicted and Actual Values for the Validation Data 3") +
xlab("Case Number") +
ylab("Price in EUR")
# Plot der Ergebnisse der Testdaten
ggplot(data_test3) +
geom_line( aes(x=1:length(prediction), y=prediction, colour = "Predicted Values" )) +
labs(title="Prediction for the Test Data 3") +
xlab("Case Number") +
ylab("Price in EUR")
#------------------------- 4 -------------------------#
data_train4 <- data.frame(prediction = training_predictions[,4], actual = training_labels[,4])
data_val4 <- data.frame(prediction = validation_predictions[,4], actual = validation_labels[,4])
data_test4 <- data.frame(prediction = testing_predictions[,4])
# Plot der Ergebnisse der Trainingsdaten
ggplot(data_train4) +
geom_line( aes(x=1:length(prediction), y=prediction, colour = "Predicted Values" )) +
geom_line( aes(x=1:length(actual), y=actual, colour = "Actual Values" )) +
scale_colour_manual( values = c("Predicted Values"="blue", "Actual Values"="red") ) +
labs(title="Predicted and Actual Values for the Training Data 4") +
xlab("Case Number") +
ylab("Price in EUR")
# Plot der Ergebnisse der Validierungsdaten
ggplot(data_val4) +
geom_line( aes(x=1:length(prediction), y=prediction, colour = "Predicted Values" )) +
geom_line( aes(x=1:length(actual), y=actual, colour = "Actual Values" )) +
scale_colour_manual( values = c("Predicted Values"="blue", "Actual Values"="red") ) +
labs(title="Predicted and Actual Values for the Validation Data 4") +
xlab("Case Number") +
ylab("Price in EUR")
# Plot der Ergebnisse der Testdaten
ggplot(data_test4) +
geom_line( aes(x=1:length(prediction), y=prediction, colour = "Predicted Values" )) +
labs(title="Prediction for the Test Data 4") +
xlab("Case Number") +
ylab("Price in EUR")
#------------------------- 5 -------------------------#
data_train5 <- data.frame(prediction = training_predictions[,5], actual = training_labels[,5])
data_val5 <- data.frame(prediction = validation_predictions[,5], actual = validation_labels[,5])
data_test5 <- data.frame(prediction = testing_predictions[,5])
# Plot der Ergebnisse der Trainingsdaten
ggplot(data_train5) +
geom_line( aes(x=1:length(prediction), y=prediction, colour = "Predicted Values" )) +
geom_line( aes(x=1:length(actual), y=actual, colour = "Actual Values" )) +
scale_colour_manual( values = c("Predicted Values"="blue", "Actual Values"="red") ) +
labs(title="Predicted and Actual Values for the Training Data 5") +
xlab("Case Number") +
ylab("Price in EUR")
# Plot der Ergebnisse der Validierungsdaten
ggplot(data_val5) +
geom_line( aes(x=1:length(prediction), y=prediction, colour = "Predicted Values" )) +
geom_line( aes(x=1:length(actual), y=actual, colour = "Actual Values" )) +
scale_colour_manual( values = c("Predicted Values"="blue", "Actual Values"="red") ) +
labs(title="Predicted and Actual Values for the Validation Data 5") +
xlab("Case Number") +
ylab("Price in EUR")
# Plot der Ergebnisse der Testdaten
ggplot(data_test5) +
geom_line( aes(x=1:length(prediction), y=prediction, colour = "Predicted Values" )) +
labs(title="Prediction for the Test Data 5") +
xlab("Case Number") +
ylab("Price in EUR")